Team rotation #1
library(tidyverse)
library(corrplot)
library(ggfortify)
library(reshape2)
library(cowplot)
library(viridis)
library(RColorBrewer)
library(ggfortify)
library(pals)
-Essential paper to read
-Very useful paper about phenocams and chromatic coordinates
-raw image data if interested
-metadata url
-GRSM PhenoCam URL -GRSM PhenoCam ROI URL with ROI mask file
-GRSM gcc data csv URL
-Site to download imagedata
rm(list=ls())
# Claire's Rserver WD
setwd("/home/claire/Git/PhenoRepo/")
site <- "GRSM"
# Individual data--data for every image taken
ind_data <- read_csv(paste0("data/",site,"/",site,"_individual.csv"))
ind_data <- ind_data[,colSums(is.na(ind_data))<nrow(ind_data)]
# head(ind_data)
# summary(ind_data)
# Daily data--data for images aggregated by day
daily_data <- read_csv(paste0("data/",site,"/",site,"_daily.csv")) %>%
filter(image_count>0)
daily_data <- daily_data[,colSums(is.na(daily_data))<nrow(daily_data)]
# head(daily_data)
# summary(daily_data)
# GDD data -- response variable
gcc_data <- read_csv(paste0("data/",site,"/",site,"_gccTargets.csv"))
# head(gcc_data)
# summary(gcc_data)
gcol <- 'springgreen4'
rcol <- 'sienna3'
bcol <- 'dodgerblue3'
doy_pal <- colorRampPalette(kovesi.cyclic_mrybm_35_75_c68(366))(366)
Individual data – looking at time series plots of data for individual photos
Histograms of rcc and gcc values
# names(ind_data)
hist(ind_data$gcc,breaks=50); abline(v=mean(ind_data$gcc,na.rm=T),col='green')
hist(ind_data$rcc,breaks=50); abline(v=mean(ind_data$gcc,na.rm=T),col='red')
GCC hist: Sort of multi-modal – one small peak around 0.10, very high peak around 0.35, and moderate but broader peak around 0.42. Lets try to match these peaks up with times of the year?
RCC: One non-normal peak around 0.37, slight left skew to data.
Correlation between the three color channels are in columns in the individual photo data set From meta data URL “correlation coefficient (across pixels) between red channel DN and green channel DN, over the ROI”
# histograms of correlation columns with mean as vertical red line
rgcorrmean <- mean(ind_data$r_g_correl,na.rm=T)
hist(ind_data$r_g_correl,main = paste("mean R-G corr = ", round(rgcorrmean,4))); abline(v=rgcorrmean,col='red')
gbcorrmean <- mean(ind_data$g_b_correl,na.rm=T)
hist(ind_data$g_b_correl,main = paste("mean G-B corr = ", round(gbcorrmean,4))); abline(v=gbcorrmean,col='red')
brcorrmean <- mean(ind_data$b_r_correl,na.rm=T)
hist(ind_data$b_r_correl,main = paste("mean B-R corr = ", round(brcorrmean,4))); abline(v=brcorrmean,col='red')
Of correlations between three color channels, highest is R-G (red green), then G-B, then B-R.
- Are there useful biological takeaways from this? What does it mean to say the G-B and are correlated? - This also doesn’t tell us pos. or neg. correlations. My intuition says that R is negative corr. with B and G, and G and B are positively correlated.
Plot of mean R, G, and B values against time.
#names(ind_data)
ind_data_melt <- melt(ind_data,id.vars = 'date',measure.vars = c('g_mean',"r_mean","b_mean"))
ggplot(ind_data_melt,aes(x=date,y=value))+
geom_point(aes(color=variable),alpha=0.05,size=1)+
facet_wrap(~variable)+
labs(x="Year", y="Mean channel pixel value")+
scale_color_manual(values=c(gcol,rcol,bcol))+
theme_classic()
## Warning: Removed 30 rows containing missing values (geom_point).
# names(ind_data)
ind_data_melt2 <- melt(ind_data,id.vars = 'solar_elev',measure.vars = c('gcc',"rcc"))
ggplot(ind_data_melt2,aes(x=solar_elev,y=value))+
geom_point(aes(color=variable),alpha=0.01)+
geom_smooth(method='lm')+
facet_wrap(~variable)+
scale_color_manual(values=c(gcol,rcol))+
theme_bw()
## Warning: Removed 20 rows containing non-finite values (stat_smooth).
## Warning: Removed 20 rows containing missing values (geom_point).
Sort of see 3 interesting groups in the green again, maybe related to the three peaks in the histogram
Daily data – Looking at time series plots of data for photos aggregated by day.
This is a plot of r mean, g mean, and b mean across time
-‘Mean’ explanation: “the mean value (for all images passing the selection criteria) of the mean (by image) red channel DN over the ROI”
- DN = digital number
- “Thus, each pixel in the image is associated with a digital number (“DN”) triplet, with each element in the triplet corresponding to the intensity of one of the colour layers. Therefore, the second step in the image analysis was to read in the images, and associated mask sequence, and to characterize the frequency distribution of the RGB DN triplets within the ROI. We did this separately for each ROI at each site, to produce the “all-image” data files contained in Data Record 3 (see below)."
# names(ind_data)
ggplot(daily_data,aes(x=date)) +
geom_point(aes(y=r_mean),color=rcol,alpha=0.5)+
geom_point(aes(y=g_mean),color=gcol,alpha=0.5)+
geom_point(aes(y=b_mean),color=bcol,alpha=0.5)+
geom_vline(xintercept = as.Date('2017-01-01'),color='grey50',alpha=0.5)+
geom_vline(xintercept = as.Date('2018-01-01'),color='grey50',alpha=0.5)+
geom_vline(xintercept = as.Date('2019-01-01'),color='grey50',alpha=0.5)+
geom_vline(xintercept = as.Date('2020-01-01'),color='grey50',alpha=0.5)+
geom_vline(xintercept = as.Date('2021-01-01'),color='grey50',alpha=0.5)+
labs(y="mean daily pixel value for each channel")+
theme_classic()
Plot gcc and rcc over time.
cc = chromatic coordinate = " the ratio of the amount of one primary color to the total amount of all three necessary to reproduce a given color."
ggplot(daily_data,aes(x=date))+
geom_vline(xintercept = as.Date('2017-01-01'),color='grey50',alpha=0.5)+
geom_vline(xintercept = as.Date('2018-01-01'),color='grey50',alpha=0.5)+
geom_vline(xintercept = as.Date('2019-01-01'),color='grey50',alpha=0.5)+
geom_vline(xintercept = as.Date('2020-01-01'),color='grey50',alpha=0.5)+
geom_vline(xintercept = as.Date('2021-01-01'),color='grey50',alpha=0.5)+
geom_point(aes(y=rcc_90),color=rcol,alpha=0.5)+
geom_point(aes(y=gcc_90),color=gcol,alpha=0.5)+
theme_classic()
Look more at gcc through time
gcc_df <- daily_data %>%
select(date,year,doy,
midday_gcc,
g_mean,
gcc_mean,gcc_std,
gcc_50,gcc_75,gcc_90)
ggplot(gcc_df,aes(x=date))+
geom_point(aes(y=gcc_90,color=as.factor(year)))+
theme_classic()
Maximum daily solar elevation vs gcc_90
# Look at max solar elev through time
mse_plot <- ggplot(daily_data,aes(x=date,y=max_solar_elev))+
geom_vline(xintercept = as.Date('2017-01-01'),color='grey50',alpha=0.5)+
geom_vline(xintercept = as.Date('2018-01-01'),color='grey50',alpha=0.5)+
geom_vline(xintercept = as.Date('2019-01-01'),color='grey50',alpha=0.5)+
geom_vline(xintercept = as.Date('2020-01-01'),color='grey50',alpha=0.5)+
geom_vline(xintercept = as.Date('2021-01-01'),color='grey50',alpha=0.5)+
geom_smooth()+
# geom_line()+ # Weird where missing values
theme_classic()
gcc_plot <- ggplot(daily_data,aes(x=date,y=gcc_90))+
geom_vline(xintercept = as.Date('2017-01-01'),color='grey50',alpha=0.5)+
geom_vline(xintercept = as.Date('2018-01-01'),color='grey50',alpha=0.5)+
geom_vline(xintercept = as.Date('2019-01-01'),color='grey50',alpha=0.5)+
geom_vline(xintercept = as.Date('2020-01-01'),color='grey50',alpha=0.5)+
geom_vline(xintercept = as.Date('2021-01-01'),color='grey50',alpha=0.5)+
geom_line(color=gcol)+
theme_classic()
plot_grid(mse_plot,gcc_plot,ncol = 1)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# Try plotting together
ggplot(daily_data,aes(x=date))+
geom_vline(xintercept = as.Date('2017-01-01'),color='grey50',alpha=0.5)+
geom_vline(xintercept = as.Date('2018-01-01'),color='grey50',alpha=0.5)+
geom_vline(xintercept = as.Date('2019-01-01'),color='grey50',alpha=0.5)+
geom_vline(xintercept = as.Date('2020-01-01'),color='grey50',alpha=0.5)+
geom_vline(xintercept = as.Date('2021-01-01'),color='grey50',alpha=0.5)+
geom_point(aes(y=log(max_solar_elev)/10),size=0.5,color='goldenrod3')+
geom_line(aes(y=gcc_90),color=gcol)+
labs(y="",title="GCC_90 and log(solar elev)/10 vs. time",subtitle = "Max solar elev transformed to compare peaks more easily")+
# geom_line()+ # Weird where missing values
theme_classic()
ggplot(daily_data,aes(y=gcc_90,x=max_solar_elev))+
geom_point(aes(color=doy),alpha=0.5)+
scale_color_gradientn(colors=doy_pal)+
geom_smooth(method='lm')+
theme_classic()+
labs(color="Day of year")
## `geom_smooth()` using formula 'y ~ x'
summary(lm(gcc_90~max_solar_elev+doy,data = daily_data))
##
## Call:
## lm(formula = gcc_90 ~ max_solar_elev + doy, data = daily_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.042906 -0.010036 -0.001024 0.012223 0.093129
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.662e-01 1.804e-03 147.54 <2e-16 ***
## max_solar_elev 1.927e-03 2.736e-05 70.43 <2e-16 ***
## doy 8.425e-05 4.244e-06 19.85 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01709 on 1462 degrees of freedom
## Multiple R-squared: 0.7768, Adjusted R-squared: 0.7765
## F-statistic: 2544 on 2 and 1462 DF, p-value: < 2.2e-16
Correlation plots
names(daily_data)
## [1] "date" "year" "doy" "image_count"
## [5] "midday_filename" "midday_r" "midday_g" "midday_b"
## [9] "midday_gcc" "midday_rcc" "r_mean" "r_std"
## [13] "g_mean" "g_std" "b_mean" "b_std"
## [17] "gcc_mean" "gcc_std" "gcc_50" "gcc_75"
## [21] "gcc_90" "rcc_mean" "rcc_std" "rcc_50"
## [25] "rcc_75" "rcc_90" "max_solar_elev"
corr_df <- daily_data %>%
select(1,2,6:27)
summary(corr_df)
## date year midday_r midday_g
## Min. :2017-01-24 Min. :2017 Min. : 61.64 Min. : 58.72
## 1st Qu.:2018-03-02 1st Qu.:2018 1st Qu.: 94.28 1st Qu.: 95.15
## Median :2019-03-03 Median :2019 Median :103.49 Median :100.03
## Mean :2019-03-08 Mean :2019 Mean :102.83 Mean :100.75
## 3rd Qu.:2020-03-19 3rd Qu.:2020 3rd Qu.:111.79 3rd Qu.:106.69
## Max. :2021-03-20 Max. :2021 Max. :142.95 Max. :140.65
## midday_b midday_gcc midday_rcc r_mean
## Min. : 29.68 Min. :0.3209 Min. :0.2815 Min. : 81.36
## 1st Qu.: 51.21 1st Qu.:0.3476 1st Qu.:0.3659 1st Qu.: 92.16
## Median : 58.66 Median :0.3852 Median :0.3863 Median :101.86
## Mean : 60.88 Mean :0.3824 Mean :0.3887 Mean :101.46
## 3rd Qu.: 69.50 3rd Qu.:0.4153 3rd Qu.:0.4107 3rd Qu.:109.16
## Max. :109.42 Max. :0.4562 Max. :0.5345 Max. :130.96
## r_std g_mean g_std b_mean
## Min. : 0.000 Min. : 82.41 Min. : 0.000 Min. : 39.22
## 1st Qu.: 6.764 1st Qu.: 96.61 1st Qu.: 4.341 1st Qu.: 55.69
## Median :10.033 Median :100.93 Median : 6.015 Median : 61.72
## Mean :10.433 Mean :101.05 Mean : 6.179 Mean : 63.95
## 3rd Qu.:13.614 3rd Qu.:105.97 3rd Qu.: 7.861 3rd Qu.: 72.13
## Max. :29.049 Max. :118.85 Max. :15.559 Max. :110.77
## b_std gcc_mean gcc_std gcc_50
## Min. : 0.000 Min. :0.3257 Min. :0.000000 Min. :0.3246
## 1st Qu.: 5.253 1st Qu.:0.3475 1st Qu.:0.001500 1st Qu.:0.3475
## Median : 7.383 Median :0.3849 Median :0.004260 Median :0.3854
## Mean : 7.647 Mean :0.3807 Mean :0.004827 Mean :0.3812
## 3rd Qu.: 9.546 3rd Qu.:0.4111 3rd Qu.:0.007060 3rd Qu.:0.4121
## Max. :27.878 Max. :0.4459 Max. :0.028810 Max. :0.4491
## gcc_75 gcc_90 rcc_mean rcc_std
## Min. :0.3275 Min. :0.3291 Min. :0.3149 Min. :0.00000
## 1st Qu.:0.3482 1st Qu.:0.3488 1st Qu.:0.3566 1st Qu.:0.01588
## Median :0.3898 Median :0.3924 Median :0.3782 Median :0.02340
## Mean :0.3837 Mean :0.3856 Mean :0.3800 Mean :0.02542
## 3rd Qu.:0.4159 3rd Qu.:0.4190 3rd Qu.:0.3989 3rd Qu.:0.03432
## Max. :0.4539 Max. :0.4557 Max. :0.5044 Max. :0.06922
## rcc_50 rcc_75 rcc_90 max_solar_elev
## Min. :0.3063 Min. :0.3198 Min. :0.3220 Min. :12.14
## 1st Qu.:0.3630 1st Qu.:0.3721 1st Qu.:0.3777 1st Qu.:38.10
## Median :0.3829 Median :0.3920 Median :0.3995 Median :53.55
## Mean :0.3866 Mean :0.3951 Mean :0.4013 Mean :54.11
## 3rd Qu.:0.4087 3rd Qu.:0.4150 3rd Qu.:0.4202 3rd Qu.:70.63
## Max. :0.5241 Max. :0.5324 Max. :0.5346 Max. :77.69
corrplot(cor(corr_df[,2:ncol(corr_df)]))
PCA Plots
# names(daily_data)
pca_df <- daily_data %>%
dplyr::select(year,doy,
# midday_r,midday_g,midday_b,
# r_mean,g_mean,b_mean,
# gcc_mean,rcc_mean,
gcc_90,rcc_90,
max_solar_elev)
pca_res <- prcomp(pca_df, scale. = TRUE)
autoplot(pca_res, data = daily_data,
colour = 'year',
# colour='grey50',
loadings = TRUE, loadings.label=TRUE,
loadings.colour='grey50',loadings.label.colour='darkred',
loadings.label.size = 5,alpha = 0.5)+
theme_classic()+
scale_color_viridis(option = "D")
## Warning: `select_()` was deprecated in dplyr 0.7.0.
## Please use `select()` instead.
PCs <- as.data.frame(pca_res$x)
PCs12_df <- cbind(PC1=PCs$PC1,PC2=PCs$PC2, daily_data)
# summary(PCs12_df)
ggplot(PCs12_df,aes(x=PC1,y=gcc_90))+
geom_point(aes(color=doy),alpha=0.5)+
scale_color_gradientn(colors=doy_pal)+
geom_smooth(method="lm",se=T)+
# geom_smooth(se=FALSE,color='red')+
theme(legend.position = "none")+
theme_classic()
## `geom_smooth()` using formula 'y ~ x'
https://www.sciencedirect.com/science/article/pii/S0378112716302298